This analysis performs differential abundance analysis on lipid profiles from maize leaf samples across multiple experimental factors:
The analysis uses limma-voom to identify lipids that respond to:
Fold change thresholds:
library(edgeR)
library(ggplot2)
library(limma)
library(dplyr)
library(tidyr)
library(forcats)
library(ggrepel)
library(cowplot)
library(ggpubr)
library(ggtext)
# Internal standards and odd-chain lipids to exclude
internal_standards <- c(
"CUDA",
"LPE_17_1",
"Sphingosine_17_1",
"PC_25_0",
"DG_18_1",
"PG_34_0",
"DG_12_0",
"TG_17_0",
"LPC_17_0",
"PE_34",
"Cholesterol"
)
# exclude the odd carbon lipid
odd_carbon <- c( "PG_17_0", "SM_35_1","TG_57_6")
to_exclude <- c(internal_standards,odd_carbon)
# Load MS-Dial raw data with new internal standard integration
lipid_csv <- file.path(paths$data, "PSU_RawData_MSDial_NewStdInt_240422.csv")
raw <- read.csv(lipid_csv, na.strings = c("", "#N/A", "NA", "Inf"))
# Remove internal standards and quality control samples
to_exclude <- to_exclude[to_exclude %in% colnames(raw)]
raw <- raw %>%
dplyr::select(-all_of(to_exclude)) %>%
filter(!grepl("Methanol", sampleID)) %>%
filter(!grepl("Pool", sampleID))
cat("Loaded", nrow(raw), "samples\n")
## Loaded 43 samples
# Load plant phenotypic data
plant_csv <- file.path(paths$data, "22_NCS_PSU_LANGEBIO_FIELDS_PSU_P_field.csv")
psu <- read.csv(plant_csv) %>%
dplyr::rename(Genotype = "Who.What", rowid = "P22.") %>%
filter(Genotype %in% c("CTRL", "INV4M")) %>%
droplevels()
# Load RNA-seq metadata
sampleInfo <- read.csv(file.path(paths$data, 'inv4mRNAseq_metadata.csv')) %>%
dplyr::rename(Genotype = genotype) %>%
dplyr::rename(rowid = row)
# Merge metadata
sampleInfo <- psu %>%
dplyr::select(rowid, Rep, Plot_Row, Plot_Column, DTA, DTS) %>%
inner_join(sampleInfo) %>%
filter(!is.na(DTA))
# Create leaf position groups
sampleInfo$leaf_group <- NA
sampleInfo$leaf_group[sampleInfo$leaf_tissue < 3] <- "apical"
sampleInfo$leaf_group[sampleInfo$leaf_tissue > 2] <- "basal"
# Standardize sample IDs
sampleInfo$tube <- gsub("L|R", "S", sampleInfo$tube, perl = TRUE)
sampleInfo$rowid <- sampleInfo$row
# mass spectrometry injection order
ms_order<- read.csv(file.path(paths$data, "PSU-PHO22_ms_order.csv"))
ms_order$tube <- gsub("L|R","S",ms_order$top_tag, perl =TRUE)
sampleInfo$row
## [1] 3056 3056 3056 3056 3059 3059 3059 3059 3080 3080 3080 3080 3083 3083 3083
## [16] 3083 3092 3092 3092 3092 3095 3095 3095 3095 3113 3113 3113 3113 3131 3131
## [31] 3131 3131 4080 4080 4080 4080 4083 4083 4083 4083 4104 4104 4104 4104 4107
## [46] 4107 4107 4107 4113 4113 4113 4113 4122 4122 4122 4122 4125 4125 4125 4125
## [61] 4131 4131 4131 4131
sampleInfo <- sampleInfo%>%
dplyr::right_join(ms_order) %>% distinct()
cat("Sample info loaded for", nrow(sampleInfo), "samples\n")
## Sample info loaded for 53 samples
# Extract tube ID from raw data
raw$tube <- gsub(".*_|.raw", "", raw$sampleID, perl = TRUE)
raw$tube <- gsub("L|R", "S", raw$tube, perl = TRUE)
# Merge phenotypic data with lipid measurements
pheno <- sampleInfo %>%
dplyr::select(rowid, tube, Rep, Plot:Plot_Row, Treatment:leaf_tissue, leaf_group) %>%
inner_join(
raw %>%
dplyr::select(tube, DGDG_34_0:TG_58_5)
) %>%
mutate(block = as.factor(Rep)) %>%
droplevels() %>%
pivot_wider(
names_from = Rep,
values_from = Rep,
names_prefix = "block_",
values_fn = function(x) 1,
values_fill = 0
) %>%
dplyr::select(rowid, tube:Plot_Row, block, block_6:block_12, everything()) %>%
mutate(Treatment = factor(Treatment, levels = c("High_P", "Low_P")))
# Simplify treatment labels
levels(pheno$Treatment) <- c("+P", "-P")
# Remove problematic sample
pheno <- pheno[-29, ] # mislabelled for lipids?
cat("Final dataset:", nrow(pheno), "samples\n")
## Final dataset: 42 samples
# Extract lipid measurements matrix
m <- pheno %>%
dplyr::select(DGDG_34_0:TG_58_5) %>%
as.matrix()
# Define lipid head groups
head_group <- c(
DG = "neutral",
DGDG = "glycolipid",
DGGA = "glycolipid",
LPC = "phospholipid",
LPE = "phospholipid",
MGDG = "glycolipid",
PC = "phospholipid",
PE = "phospholipid",
PG = "phospholipid",
PI = "phospholipid",
SQDG = "glycolipid",
TG = "neutral"
)
# Create lipid species annotation
sp <- data.frame(
colname = colnames(m),
class = gsub("_.*", "", colnames(m), perl = TRUE)
)
sp$head_group <- head_group[sp$class]
# Summary of lipid classes
cat("\nLipid class distribution:\n")
##
## Lipid class distribution:
print(with(sp, table(head_group, class)))
## class
## head_group DG DGDG DGGA LPC LPE MGDG PC PE PG PI SQDG TG
## glycolipid 0 12 9 0 0 8 0 0 0 0 9 0
## neutral 22 0 0 0 0 0 0 0 0 0 0 25
## phospholipid 0 0 0 6 3 0 19 12 6 4 0 0
Critical step: The counts matrix comes from pheno, but
we need to ensure the sample metadata in the DGEList matches the
processed pheno data, especially for Treatment levels which
were renamed from “High_P”/“Low_P” to “+P”/“-P”.
# Prepare count matrix
counts <- pheno[, colnames(m)] %>%
as.matrix() %>%
t()
# Visualize data distribution
hist(counts, main = "Distribution of Lipid Abundances", xlab = "Abundance")
# Set column names to match sample IDs
colnames(counts) <- pheno$tube
cat("\nCount matrix dimensions:", dim(counts), "\n")
##
## Count matrix dimensions: 135 42
cat("Lipids:", nrow(counts), "\n")
## Lipids: 135
cat("Samples:", ncol(counts), "\n")
## Samples: 42
# Extract sample names from counts matrix
sampleNames <- pheno$tube
# Verify all sample names are present
cat("\nAll samples in counts?", all(sampleNames %in% sampleInfo$tube), "\n")
##
## All samples in counts? TRUE
# Match sampleInfo to the order of samples in counts matrix
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]
# CRIionAL: Update Treatment levels to match pheno
# The pheno object has Treatment renamed to "+P"/"-P"
sampleInfo_matched$Treatment <- factor(
pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
levels = c("+P", "-P")
)
cat("\nTreatment levels after update:\n")
##
## Treatment levels after update:
print(levels(sampleInfo_matched$Treatment))
## [1] "+P" "-P"
print(table(sampleInfo_matched$Treatment))
##
## +P -P
## 15 27
# Create corrected matrix for plotting
sampleInfo$leaf_tissue_c <- scale(sampleInfo$leaf_tissue, center = TRUE, scale = FALSE)
filtered_sample_idx <- which(colnames(counts) %in% sampleInfo$tube)
filtered_covariates <- sampleInfo[
filtered_sample_idx,
c("Plot_Row","injection_order")]
filtered_metadata <- sampleInfo[filtered_sample_idx,]
#
# corrected_matrix <- removeBatchEffect(
# counts,
# covariates = filtered_covariates,
# design = model.matrix(
# ~ leaf_tissue_c * Treatment + Genotype * Treatment +
# leaf_tissue_c * Genotype,
# data = filtered_metadata
# )
# )
#
# corrected_matrix <- corrected_matrix - min( corrected_matrix)
# Use corrected_matrix for:
# - PCA plots
# - Heatmaps
# - Boxplots of individual metabolites
# - Any visualization where drift would obscure biology
# But use original metabolite_matrix with full design for statistics
# design_full <- model.matrix(
# ~ Plot_Row + injection_order +
# leaf_tissue_c * Treatment + Genotype * Treatment +
# leaf_tissue_c * Genotype,
# data = metadata
# )
#
# fit <- lmFit(metabolite_matrix, design_full)
# fit <- eBayes(fit)
# Create gene annotation
genes <- data.frame(gene = rownames(counts))
# Create DGEList with matched sample information
y <- DGEList(counts = counts, samples = sampleInfo_matched )
# Create sample groups from Treatment and Genotype
y$group <- interaction(y$samples$Treatment, y$samples$Genotype)
cat("\nDGEList created successfully\n")
##
## DGEList created successfully
cat("Groups:", levels(y$group), "\n")
## Groups: +P.CTRL -P.CTRL +P.INV4 -P.INV4
cat("\nLibrary size summary:\n")
##
## Library size summary:
print(summary(y$samples$lib.size))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.231e+10 4.175e+10 5.655e+10 5.876e+10 7.080e+10 1.110e+11
cat("\nLow count samples:\n")
##
## Low count samples:
print(table(y$samples$lowCount))
## < table of extent 0 >
# Keep lipids with sufficient abundance
# Calculate Total Ion Current fractions ~ molar fractions
ion_fraction <- sweep(y$counts, 2, colSums(y$counts), FUN = "/")
hist(log10(ion_fraction))
keep <- (rowMeans(ion_fraction==0)) < 0.20
sum(!keep)
## [1] 3
y_filtered <- y[keep, ]
{
cat("\nabundance filtering:\n")
cat(" Genes before:", nrow(y), "\n")
cat(" Genes after:", nrow(y_filtered), "\n")
cat(" Genes removed:", sum(!keep), "\n")
}
##
## abundance filtering:
## Genes before: 135
## Genes after: 132
## Genes removed: 3
CRIionAL: We need to use the ORIGINAL y_filtered object for the metadata to ensure all samples are included in the MDS visualization, but use the coordinates from the MDS computed on all samples.
y_filtered$samples$lowCount <- y_filtered$samples$lib.size < 3e10
# Remove low quality samples
y_filtered_bySample <- y_filtered[, !y_filtered$samples$lowCount]
{
cat("\nLow quality libraries:\n")
table(y_filtered$samples$lowCount)
cat("\nSamples after filtering:\n")
cat(" Retained:", ncol(y_filtered_bySample), "\n")
cat(" Removed:", sum(y_filtered$samples$lowCount), "\n")
}
##
## Low quality libraries:
##
## Samples after filtering:
## Retained: 38
## Removed: 4
# Extract sample metadata from filtered object
d <- y_filtered_bySample$samples
# Ensure factors are properly set
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
d$Rep <- factor(d$Rep)
# Lipid Class Composition Analysis
# =================================
## Calculate class-level abundances from normalized data
# Extract normalized counts (ion fraction scaled)
norm_counts <- ion_fraction * 1e9
# Create metadata for normalized samples
norm_metadata <- d %>%
dplyr::filter(tube %in% colnames(norm_counts))
# Transpose normalized counts and merge with lipid annotations
lipid_abundance <- norm_counts %>%
t() %>%
as.data.frame() %>%
dplyr::mutate(tube = rownames(.)) %>%
tidyr::pivot_longer(
cols = -tube,
names_to = "response",
values_to = "abundance"
) %>%
dplyr::inner_join(sp, by = c("response" = "colname")) %>%
dplyr::inner_join(norm_metadata, by = "tube")
## Create color palette with variations within each head group
# Define base colors for each head group
base_colors <- c(
glycolipid = "darkblue",
phospholipid = "darkred",
neutral = "darkorange"
)
# Function to generate color variations (lighter/darker shades)
generate_color_variations <- function(base_color, n) {
if (n == 1) return(base_color)
# Use colorRampPalette to create gradient from lighter to darker
colorRampPalette(c(
colorspace::lighten(base_color, 0.4),
base_color,
colorspace::darken(base_color, 0.3)
))(n)
}
# Get classes by head group from sp annotation
classes_by_headgroup <- sp %>%
dplyr::select(class, head_group) %>%
dplyr::distinct() %>%
dplyr::arrange(head_group, class)
# Order classes by head group for consistent display
class_order <- classes_by_headgroup %>%
dplyr::pull(class)
# Generate colors for each head group
class_colors <- c()
for (hg in names(base_colors)) {
# Get classes in this head group
hg_classes <- classes_by_headgroup %>%
dplyr::filter(head_group == hg) %>%
dplyr::pull(class)
# Generate color variations
if (length(hg_classes) > 0) {
hg_colors <- generate_color_variations(base_colors[hg], length(hg_classes))
names(hg_colors) <- hg_classes
class_colors <- c(class_colors, hg_colors)
}
}
# Reorder to match class_order
class_colors <- class_colors[class_order]
cat("\nGenerated", length(class_colors), "color variations\n")
##
## Generated 12 color variations
cat("Colors by head group:\n")
## Colors by head group:
print(table(classes_by_headgroup$head_group))
##
## glycolipid neutral phospholipid
## 4 2 6
# Define custom green to orange palette for leaf positions
green_to_orange <- c(
"#00954A", # Spanish Green (Leaf 1)
"#A4DF56", # Inchworm (Leaf 2)
"#D7E23C", # Pear (Leaf 3)
"#FF9A1F" # Crayola's Bright Yellow/Orange (Leaf 4)
)
## Plot 1: Mean composition by Leaf position - grouped bars by leaf within each class
leaf_composition <- lipid_abundance %>%
dplyr::group_by(tube, Treatment, leaf_tissue, class) %>%
dplyr::summarise(class_sum = sum(abundance), .groups = "drop") %>%
dplyr::group_by(tube, Treatment, leaf_tissue) %>%
dplyr::mutate(class_pct = 100 * class_sum / sum(class_sum)) %>%
dplyr::ungroup() %>%
dplyr::group_by(Treatment, leaf_tissue, class) %>%
dplyr::summarise(
mean_pct = mean(class_pct),
se_pct = sd(class_pct) / sqrt(n()),
.groups = "drop"
)
leaf_composition$class <- factor(leaf_composition$class, levels = class_order)
# Create leaf position factor
leaf_composition <- leaf_composition %>%
dplyr::mutate(
leaf_position = factor(
leaf_tissue,
levels = c(1, 2, 3, 4),
labels = c("1", "2", "3", "4")
)
)
# Add head group for faceting
leaf_composition <- leaf_composition %>%
dplyr::inner_join(
sp %>% dplyr::select(class, head_group) %>% dplyr::distinct(),
by = "class"
)
# Plot with lipid classes on x-axis, grouped by leaf position, log Y-scale
plot1_composition <- leaf_composition %>%
ggplot(aes(x = class, y = mean_pct, fill = leaf_position)) +
geom_col(color = "black", linewidth = 0.2, position = position_dodge(width = 0.9)) +
geom_errorbar(
aes(ymin = mean_pct - se_pct, ymax = mean_pct + se_pct),
position = position_dodge(width = 0.9),
width = 0.25,
linewidth = 0.5
) +
scale_fill_manual(
name = "Leaf",
values = green_to_orange
) +
scale_y_log10(
breaks = c(0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 40),
labels = c("0.05", "0.1", "0.2", "0.5", "1", "2", "5", "10", "20", "40")
) +
labs(
y = "Ion %"
) +
facet_grid(rows = vars(Treatment), cols = vars(head_group),
scales = "free_x", space = "free_x") +
theme_classic2(base_size = 14) +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 13, face = "bold"),
strip.background = element_blank(),
strip.text.x = element_text(angle = 0, size = 25, face = "bold"),
strip.text.y = element_text(angle = 0, size = 25, face = "bold"),
legend.position = "none"
)
## Plot 2: Treatment effect (-P vs +P) by leaf stage
# Calculate mean and SE per Treatment-Leaf-Class
class_abundance_summary <- lipid_abundance %>%
dplyr::group_by(tube, Treatment, leaf_tissue, class) %>%
dplyr::summarise(class_sum = sum(abundance), .groups = "drop") %>%
dplyr::group_by(Treatment, leaf_tissue, class) %>%
dplyr::summarise(
mean_abundance = mean(class_sum),
se_abundance = sd(class_sum) / sqrt(n()),
n_samples = n(),
.groups = "drop"
)
# Pivot to get +P and -P side by side
treatment_comparison <- class_abundance_summary %>%
tidyr::pivot_wider(
names_from = Treatment,
values_from = c(mean_abundance, se_abundance, n_samples)
) %>%
dplyr::mutate(
# Calculate log2 fold change: log2(-P / +P)
log2fc = log2(`mean_abundance_-P` / `mean_abundance_+P`),
# Propagate error using delta method
# For log2(Y/X): SE ≈ 1/ln(2) × sqrt((SE_Y/Y)^2 + (SE_X/X)^2)
se_log2fc = (1/log(2)) * sqrt(
(`se_abundance_-P` / `mean_abundance_-P`)^2 +
(`se_abundance_+P` / `mean_abundance_+P`)^2
)
) %>%
dplyr::mutate(
leaf_position = factor(
leaf_tissue,
levels = c(1, 2, 3, 4),
labels = c("1", "2", "3", "4")
)
)
treatment_comparison$class <- factor(
treatment_comparison$class,
levels = class_order
)
# Add head group
treatment_comparison <- treatment_comparison %>%
dplyr::inner_join(
sp %>% dplyr::select(class, head_group) %>% dplyr::distinct(),
by = "class"
)
# Plot treatment effect
plot2_treatment_effect <- treatment_comparison %>%
ggplot(aes(x = class, y = log2fc, fill = leaf_position)) +
geom_col(color = "black", linewidth = 0.2, position = position_dodge(width = 0.9)) +
geom_errorbar(
aes(ymin = log2fc - se_log2fc, ymax = log2fc + se_log2fc),
position = position_dodge(width = 0.9),
width = 0.25,
linewidth = 0.5
) +
scale_fill_manual(
name = "Leaf",
values = green_to_orange
) +
labs(
y = expression(log[2]~"FC (-P / +P)")
) +
facet_grid(cols = vars(head_group), scales = "free_x", space = "free_x") +
theme_classic2(base_size = 14) +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 13, face = "bold"),
strip.background = element_blank(),
strip.text.x = element_blank(),
)
## Plot 3: Relative change with paired fold changes per plant
# Get sample-level abundances with plant ID
class_abundance_samples <- lipid_abundance %>%
dplyr::group_by(tube, NCSU_RNA_plant, Treatment, leaf_tissue, class) %>%
dplyr::summarise(class_sum = sum(abundance), .groups = "drop")
# Pivot wide to get all leaves per plant
class_abundance_wide <- class_abundance_samples %>%
dplyr::select(-tube) %>%
tidyr::pivot_wider(
names_from = leaf_tissue,
values_from = class_sum,
names_prefix = "leaf_"
)
# Fill missing values with treatment-class mean for that leaf
class_abundance_filled <- class_abundance_wide %>%
dplyr::group_by(Treatment, class) %>%
dplyr::mutate(
leaf_1 = ifelse(is.na(leaf_1), mean(leaf_1, na.rm = TRUE), leaf_1),
leaf_2 = ifelse(is.na(leaf_2), mean(leaf_2, na.rm = TRUE), leaf_2),
leaf_3 = ifelse(is.na(leaf_3), mean(leaf_3, na.rm = TRUE), leaf_3),
leaf_4 = ifelse(is.na(leaf_4), mean(leaf_4, na.rm = TRUE), leaf_4)
) %>%
dplyr::ungroup()
# Calculate log2 fold change per plant (keeping leaf 1 = 0 for symmetry)
class_relative_per_plant <- class_abundance_filled %>%
dplyr::mutate(
leaf1_log2fc = 0,
leaf2_log2fc = log2(leaf_2 / leaf_1),
leaf3_log2fc = log2(leaf_3 / leaf_1),
leaf4_log2fc = log2(leaf_4 / leaf_1)
) %>%
dplyr::select(NCSU_RNA_plant, Treatment, class,
leaf1_log2fc, leaf2_log2fc, leaf3_log2fc, leaf4_log2fc) %>%
tidyr::pivot_longer(
cols = starts_with("leaf"),
names_to = "leaf_position",
values_to = "log2fc"
)
# Calculate mean and SE
class_leaf_ratio_treatment <- class_relative_per_plant %>%
dplyr::group_by(Treatment, leaf_position, class) %>%
dplyr::summarise(
mean_log2fc = mean(log2fc, na.rm = TRUE),
se_log2fc = sd(log2fc, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) %>%
dplyr::mutate(
leaf_position = factor(
leaf_position,
levels = c("leaf1_log2fc", "leaf2_log2fc", "leaf3_log2fc", "leaf4_log2fc"),
labels = c("1", "2", "3", "4")
)
)
class_leaf_ratio_treatment$class <- factor(
class_leaf_ratio_treatment$class,
levels = class_order
)
# Add head group
class_leaf_ratio_treatment <- class_leaf_ratio_treatment %>%
dplyr::inner_join(
sp %>% dplyr::select(class, head_group) %>% dplyr::distinct(),
by = "class"
)
# Plot with log2FC
plot3_leaf_relative <- class_leaf_ratio_treatment %>%
ggplot(aes(x = class, y = mean_log2fc, fill = leaf_position)) +
geom_col(color = "black", linewidth = 0.2, position = position_dodge(width = 0.9)) +
geom_errorbar(
aes(ymin = mean_log2fc - se_log2fc, ymax = mean_log2fc + se_log2fc),
position = position_dodge(width = 0.9),
width = 0.25,
linewidth = 0.5
) +
scale_fill_manual(
name = "Leaf",
values = green_to_orange
) +
labs(
y = expression(log[2]~"FC vs Leaf 1")
) +
facet_grid(rows = vars(Treatment), cols = vars(head_group),
scales = "free_x", space = "free_x") +
theme_classic2(base_size = 14) +
theme(
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 0, hjust = 0.5, size = 13, face = "bold"),
strip.background = element_blank(),
strip.text.x = element_blank(),
strip.text.y = element_text(angle = 0, size = 25, face = "bold"),
legend.position = "none"
)
# Combine all three plots
final_combined <- plot_grid(
plot1_composition,
plot2_treatment_effect,
plot3_leaf_relative,
ncol = 1,
labels = c("A", "B", "C"),
label_size = 25,
rel_heights = c(1, 0.5, 1),
align = "v",
axis = "lr"
)
# Save combined plot
ggsave(
file.path(paths$figures, "lipid_composition_combined_final.svg"),
final_combined,
width = 10,
height = 10,
dpi = 150
)
cat("\nFinal combined plot created and saved\n")
##
## Final combined plot created and saved
print(final_combined)
# MDS with all samples (before library size filtering)
mds_all <- plotMDS(y_filtered, pch = 21, labels=y$samples$tube,plot = TRUE)
# Histogram of library sizes
hist(
y$samples$lib.size / 1e6,
main = "Library Size Distribution",
xlab = "Library Size (millions of reads)",
breaks = 20
)
{
cat("\nLibrary size summary:\n")
print(summary(y_filtered$samples$lib.size))
cat("\nSamples with lib.size < 20 million:",
sum(y_filtered$samples$lib.size < 2e7), "\n")
}
##
## Library size summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.231e+10 4.175e+10 5.655e+10 5.876e+10 7.080e+10 1.110e+11
##
## Samples with lib.size < 20 million: 0
mds_qc_data <- y_filtered$samples %>%
mutate(
dim1 = mds_all$x,
dim2 = mds_all$y,
lib_size = lib.size / 1e10
)
# Plot MDS colored by MS injection order
ggplot( mds_qc_data, aes(x = dim1, y = dim2, color =injection_order )) +
geom_point(size = 3) +
scale_color_viridis_c(option = "plasma") +
labs(
x = paste0("Dim1 (", round(100 * mds_all$var.explained[1]), "%)"),
y = paste0("Dim2 (", round(100 * mds_all$var.explained[2]), "%)"),
color = "Injection Order"
) +
theme_classic2(base_size = 15)
MDS reveals sources of variation in gene abundance across samples. Dimensions 3 and 4 are extracted from eigenvectors scaled by square root of variance explained.
mds <- plotMDS(
y,
pch = 21,
label = y$samples$side_tag,
plot = FALSE
)
# Store MDS coordinates in sample data
d <- y_filtered$samples
d$dim1 <- mds$x
d$dim2 <- mds$y
d$dim3 <- mds$eigen.vectors[, 3] * sqrt(mds$var.explained[3])
d$dim4 <- mds$eigen.vectors[, 4] * sqrt(mds$var.explained[4])
# Prepare factors for plotting
d$Treatment <- factor(d$Treatment)
d$Genotype <- factor(d$Genotype)
# Variance explained by each dimension
tibble(
dimension = paste("Dim", 1:4),
var_explained = round(mds$var.explained[1:4], 4)
)
## # A tibble: 4 × 2
## dimension var_explained
## <chr> <dbl>
## 1 Dim 1 0.377
## 2 Dim 2 0.14
## 3 Dim 3 0.101
## 4 Dim 4 0.0784
Exploring which experimental factors drive the main dimensions of variation.
# Treatment
p1 <- ggplot(d, aes(x = dim2, y = dim3, color = Treatment)) +
geom_point(size = 3) +
theme_classic2(base_size = 15) +
ggtitle("Treatment")
# Row position
p2 <- ggplot(d, aes(x = dim2, y = dim3, color = injection_order, shape = Treatment)) +
geom_point(size = 3) +
theme_classic2(base_size = 15) +
ggtitle("Injection order")
# Collection time
p3 <- ggplot(d, aes(x = dim2, y = dim3, color = decimal_time)) +
geom_point(size = 3) +
theme_classic2(base_size = 15) +
ggtitle("Collection Time")
# Collector
p4 <- ggplot(d, aes(x = dim2, y = dim3, color = COLLECTOR)) +
geom_point(size = 3) +
theme_classic2(base_size = 15) +
ggtitle("Collector")
# Genotype
p5 <- ggplot(d, aes(x = dim2, y = dim3, color = Genotype)) +
geom_point(size = 3) +
theme_classic2(base_size = 15) +
ggtitle("Genotype")
# Leaf tissue
p6 <- d %>%
mutate(leaf = factor(leaf_tissue)) %>%
ggplot(aes(x = dim2, y = dim3, color = leaf)) +
geom_point(size = 3) +
theme_classic2(base_size = 15) +
ggtitle("Leaf Tissue")
ggarrange(p1, p2, p3, p4, p5, p6, ncol = 3, nrow = 2)
lipid_MDS_23 <- d %>%
mutate(leaf = factor(leaf_tissue)) %>%
ggplot(aes(x = dim2, y = dim3)) +
ggtitle("Lipid MDS") +
xlab(paste0("dim2 (", round(100 * mds$var.explained[2]), "%)")) +
ylab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
geom_point(aes(fill = leaf, shape = Treatment), size = 4) +
scale_fill_manual(values= green_to_orange) +
scale_shape_manual(values = c(21, 25)) +
guides(
shape = guide_legend( title = element_blank(),
override.aes = list(fill= "black")),
fill = guide_legend(
title = "Leaf",
order = 2,
override.aes = list(geom = "point", shape = 22, size = 7, reverse = TRUE)
)
) +
theme_classic2(base_size = 30) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(), # Remove x-axis ticks
axis.text.y = element_blank(), # Remove y-axis labels
axis.ticks.y = element_blank(), # Remove y-axis ticks
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "in"),
legend.background = element_rect(fill = "transparent")
#legend.position = c(0.2, 0.17)
)
# Save growth curve plots
saveRDS(lipid_MDS_23 , file.path(paths$intermediate, "lipid_MDS_23.rds"))
cat("MDS plots saved to output/\n")
## MDS plots saved to output/
lipid_MDS_23
The third dimension separates by genotype.
# Set up genotype labels with italic formatting
labels <- c("CTRL", "*Inv4m*")
names(labels) <- c("CTRL", "INV4")
d %>%
mutate(leaf = factor(leaf_tissue)) %>%
ggplot(aes(x = dim3, y = dim4, fill = leaf, shape = Treatment)) +
xlab(paste0("dim3 (", round(100 * mds$var.explained[3]), "%)")) +
ylab(paste0("dim4 (", round(100 * mds$var.explained[4]), "%)")) +
geom_point(size = 4) +
scale_fill_manual(values= green_to_orange) +
scale_shape_manual(values = c(21, 25)) +
guides(
shape = guide_legend( title = element_blank(),
override.aes = list(fill= "black")),
fill = guide_legend(
title = "Leaf",
order = 2,
override.aes = list(geom = "point", shape = 22, size = 7, reverse = TRUE)
)
) +
theme_classic2(base_size = 25) +
theme(
legend.box = "horizontal",
legend.position = c(0.9, 0.8),
legend.text = element_markdown(),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line")
)
# Calculate correlations and p-values
mds_cor_results <- tibble(
dimension = c("Dim2", "Dim3", "Dim4"),
factor = c("Treatment", "Leaf tissue","Leaf tissue"),
correlation = c(
cor(d$dim2, as.numeric(d$Treatment)),
cor(d$dim3, as.numeric(d$leaf_tissue)),
cor(d$dim4, as.numeric(d$leaf_tissue))
),
p_value = c(
cor.test(d$dim2, as.numeric(d$Treatment))$p.value,
cor.test(d$dim3, d$leaf_tissue)$p.value,
cor.test(d$dim4, d$leaf_tissue)$p.value
)
) %>%
mutate(
adj_p_value = p.adjust(p_value, method = "fdr"),
correlation = round(correlation, 3),
p_value = signif(p_value, 3),
adj_p_value = signif(adj_p_value, 3)
)
mds_cor_results
## # A tibble: 3 × 5
## dimension factor correlation p_value adj_p_value
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Dim2 Treatment 0.427 0.00478 0.00478
## 2 Dim3 Leaf tissue -0.556 0.000132 0.000396
## 3 Dim4 Leaf tissue 0.517 0.000461 0.000692
CRIionAL: Before creating the design matrix, ensure Treatment levels are correctly set to match the original factor levels for proper interpretation.
# --- Verify experimental design variables ---
# Check that Plot_Column is present in the metadata
cat("\nPlot_Column present?", "Plot_Column" %in% colnames(d), "\n")
##
## Plot_Column present? TRUE
cat("Plot_Column values:\n")
## Plot_Column values:
print(table(d$Plot_Column))
##
## 3 4 5 6
## 8 13 6 15
# Check that Plot_Row is present in the metadata
cat("\nPlot_Row present?", "Plot_Row" %in% colnames(d), "\n")
##
## Plot_Row present? TRUE
cat("Plot_Row values:\n")
## Plot_Row values:
print(table(d$Plot_Row))
##
## 1 2 3 4 5 6 7 8
## 3 4 4 4 10 9 4 4
# --- Prepare treatment variable for model fitting ---
# Re-establish the factor levels for Treatment so that "+P" (control) is the baseline.
# This ensures the model interprets Treatment effects as "-P" relative to "+P".
d$Treatment <- factor(d$Treatment, levels = c("+P", "-P"))
# --- Center the continuous leaf age variable ---
# Convert leaf_tissue (leaf age or developmental stage) to a centered numeric covariate.
# Centering subtracts the mean leaf age so that:
# - the intercept represents abundance at the *average leaf age*,
# - the Treatment coefficient corresponds to the treatment effect at mean leaf age,
# - and collinearity between leaf age and Treatment is minimized.
# --- Verify treatment structure after recoding ---
cat("\nTreatment levels for model:\n")
##
## Treatment levels for model:
print(levels(d$Treatment))
## [1] "+P" "-P"
print(table(d$Treatment))
##
## +P -P
## 15 27
# Extract sample names from counts matrix
sampleNames <- y_filtered_bySample$samples$tube
# Verify all sample names are present
cat("\nAll samples in counts?", all(sampleNames %in% sampleInfo$tube), "\n")
##
## All samples in counts? TRUE
# Match sampleInfo to the order of samples in counts matrix
sampleInfo_matched <- sampleInfo[match(sampleNames, sampleInfo$tube), ]
sampleInfo_matched
## rowid Rep Plot_Row Plot_Column DTA DTS library tube rna_ctn rna_batch1
## 1 3056 6 3 3 77 78 L01_S1_L002 S01 49 1
## 2 3056 6 3 3 77 78 L02_S2_L002 S02 101 1
## 3 3056 6 3 3 77 78 L03_S3_L002 S03 41 1
## 6 3059 6 4 3 76 77 L06_S6_L002 S06 63 1
## 7 3059 6 4 3 76 77 L07_S7_L002 S07 71 1
## 8 3059 6 4 3 76 77 L08_S8_L002 S08 86 1
## 9 3080 7 6 4 76 78 L09_S9_L002 S09 269 1
## 11 3080 7 6 4 76 78 L11_S11_L002 S11 70 1
## 12 3080 7 6 4 76 78 L12_S12_L002 S12 179 1
## 13 3083 7 5 4 78 78 L13_S13_L002 S13 42 1
## 14 3083 7 5 4 78 78 L15_S15_L002 S15 41 1
## 16 3092 5 2 4 75 76 L17_S17_L002 S17 137 2
## 17 3092 5 2 4 75 76 L18_S18_L002 S18 61 2
## 18 3092 5 2 4 75 76 L19_S19_L002 S19 56 2
## 19 3092 5 2 4 75 76 L20_S20_L002 S20 38 2
## 20 3095 5 1 4 74 74 L21_S21_L002 S21 148 2
## 21 3095 5 1 4 74 74 L23_S23_L002 S23 53 2
## 22 3095 5 1 4 74 74 L24_S24_L002 S24 173 2
## 23 3113 11 6 5 80 81 L25_S25_L002 S25 88 2
## 24 3113 11 6 5 80 81 L26_S26_L002 S26 60 2
## 25 3113 11 6 5 80 81 L27_S27_L002 S27 56 2
## 26 3131 11 5 6 75 77 L29_S29_L002 S29 138 2
## 27 3131 11 5 6 75 77 L31_S31_L002 S31 42 2
## 28 3131 11 5 6 75 77 L32_S32_L002 S32 94 2
## 29 4113 11 6 5 73 74 L49_S45_L002 S49 96 4
## 31 4113 11 6 5 73 74 L51_S47_L002 S51 150 4
## 32 4113 11 6 5 73 74 L52_S48_L002 S52 146 4
## 34 4122 12 8 6 71 73 L54_S50_L002 S54 85 4
## 35 4122 12 8 6 71 73 L55_S51_L002 S55 101 4
## 36 4122 12 8 6 71 73 L56_S52_L002 S56 161 4
## 37 4125 12 7 6 69 71 L57_S53_L002 S57 182 4
## 38 4125 12 7 6 69 71 L58_S54_L002 S58 163 4
## 39 4125 12 7 6 69 71 L59_S55_L002 S59 95 4
## 40 4125 12 7 6 69 71 L60_S56_L002 S60 213 4
## 41 4131 11 5 6 69 71 L61_S57_L002 S61 120 4
## 42 4131 11 5 6 69 71 L62_S58_L002 S62 103 4
## 43 4131 11 5 6 69 71 L63_S59_L002 S63 186 4
## 44 4131 11 5 6 69 71 L64_S60_L002 S64 82 4
## tube_n side NCSU_RNA_plant Treatment Genotype leaf_tissue side_tag
## 1 1 L 1 Low_P CTRL 1 #NAME?
## 2 2 L 1 Low_P CTRL 2 #NAME?
## 3 3 L 1 Low_P CTRL 3 #NAME?
## 6 6 L 2 Low_P INV4 2 #NAME?
## 7 7 L 2 Low_P INV4 3 #NAME?
## 8 8 L 2 Low_P INV4 4 #NAME?
## 9 9 L 3 Low_P INV4 1 #NAME?
## 11 11 L 3 Low_P INV4 3 #NAME?
## 12 12 L 3 Low_P INV4 4 #NAME?
## 13 13 L 4 Low_P CTRL 1 #NAME?
## 14 15 L 4 Low_P CTRL 3 #NAME?
## 16 17 L 5 Low_P INV4 1 #NAME?
## 17 18 L 5 Low_P INV4 2 #NAME?
## 18 19 L 5 Low_P INV4 3 #NAME?
## 19 20 L 5 Low_P INV4 4 #NAME?
## 20 21 L 6 Low_P CTRL 1 #NAME?
## 21 23 L 6 Low_P CTRL 3 #NAME?
## 22 24 L 6 Low_P CTRL 4 #NAME?
## 23 25 L 7 Low_P CTRL 1 #NAME?
## 24 26 L 7 Low_P CTRL 2 #NAME?
## 25 27 L 7 Low_P CTRL 3 #NAME?
## 26 29 L 8 Low_P INV4 1 #NAME?
## 27 31 L 8 Low_P INV4 3 #NAME?
## 28 32 L 8 Low_P INV4 4 #NAME?
## 29 49 L 13 High_P CTRL 1 #NAME?
## 31 51 L 13 High_P CTRL 3 #NAME?
## 32 52 L 13 High_P CTRL 4 #NAME?
## 34 54 L 14 High_P CTRL 2 #NAME?
## 35 55 L 14 High_P CTRL 3 #NAME?
## 36 56 L 14 High_P CTRL 4 #NAME?
## 37 57 L 15 High_P INV4 1 #NAME?
## 38 58 L 15 High_P INV4 2 #NAME?
## 39 59 L 15 High_P INV4 3 #NAME?
## 40 60 L 15 High_P INV4 4 #NAME?
## 41 61 L 16 High_P INV4 1 #NAME?
## 42 62 L 16 High_P INV4 2 #NAME?
## 43 63 L 16 High_P INV4 3 #NAME?
## 44 64 L 16 High_P INV4 4 #NAME?
## TIME decimal_time COLLECTOR NOTE S Plot std_count_06102022
## 1 12H 37M 0S 12.61667 SERGIO 1 PlotVI 4
## 2 12H 42M 0S 12.70000 SERGIO 2 PlotVI 4
## 3 12H 43M 0S 12.71667 SERGIO 3 PlotVI 4
## 6 12H 48M 0S 12.80000 SERGIO NA PlotVI 4
## 7 12H 48M 0S 12.80000 SERGIO NA PlotVI 4
## 8 12H 50M 0S 12.83333 SERGIO NA PlotVI 4
## 9 12H 50M 0S 12.83333 SERGIO NA PlotVI 6
## 11 12H 53M 0S 12.88333 SERGIO NA PlotVI 6
## 12 12H 54M 0S 12.90000 SERGIO NA PlotVI 6
## 13 12H 55M 0S 12.91667 SERGIO 5 PlotVI 7
## 14 12H 57M 0S 12.95000 SERGIO ADD BEADS 7 PlotVI 7
## 16 13H 5M 0S 13.08333 RUAIRIDH NA PlotVI 7
## 17 13H 6M 0S 13.10000 RUAIRIDH NA PlotVI 7
## 18 13H 7M 0S 13.11667 RUAIRIDH NA PlotVI 7
## 19 13H 7M 0S 13.11667 RUAIRIDH NA PlotVI 7
## 20 13H 9M 0S 13.15000 RUAIRIDH 9 PlotVI 2
## 21 13H 10M 0S 13.16667 RUAIRIDH 11 PlotVI 2
## 22 13H 11M 0S 13.18333 RUAIRIDH 12 PlotVI 2
## 23 13H 12M 0S 13.20000 RUAIRIDH 13 PlotVI 9
## 24 13H 13M 0S 13.21667 RUAIRIDH 14 PlotVI 9
## 25 13H 16M 0S 13.26667 RUAIRIDH 15 PlotVI 9
## 26 13H 18M 0S 13.30000 RUAIRIDH NA PlotVI 10
## 27 13H 20M 0S 13.33333 RUAIRIDH NA PlotVI 10
## 28 13H 21M 0S 13.35000 RUAIRIDH NA PlotVI 10
## 29 13H 5M 0S 13.08333 SERGIO 25 PlotVIII 3
## 31 13H 7M 0S 13.11667 SERGIO 27 PlotVIII 3
## 32 13H 8M 0S 13.13333 SERGIO 28 PlotVIII 3
## 34 13H 10M 0S 13.16667 SERGIO 30 PlotVIII 6
## 35 13H 11M 0S 13.18333 SERGIO 31 PlotVIII 6
## 36 13H 12M 0S 13.20000 SERGIO ADD BEADS 32 PlotVIII 6
## 37 13H 12M 0S 13.20000 SERGIO NA PlotVIII 6
## 38 13H 14M 0S 13.23333 SERGIO NA PlotVIII 6
## 39 13H 15M 0S 13.25000 SERGIO NA PlotVIII 6
## 40 13H 16M 0S 13.26667 SERGIO NA PlotVIII 6
## 41 13H 17M 0S 13.28333 SERGIO NA PlotVIII 5
## 42 13H 20M 0S 13.33333 SERGIO NA PlotVIII 5
## 43 13H 21M 0S 13.35000 SERGIO NA PlotVIII 5
## 44 13H 22M 0S 13.36667 SERGIO NA PlotVIII 5
## RNA_Q borde notes leaf_number RNA_P leaf_group analysis_sampleID pool
## 1 3 variacion NA 7 26 apical 230113_1_R01 1
## 2 3 variacion NA 7 26 apical 230113_1_R02 2
## 3 3 variacion NA 7 26 basal 230113_1_R03 3
## 6 5 NA 8 16 apical 230113_2_R06 5
## 7 5 NA 8 16 basal 230113_2_R07 2
## 8 5 NA 8 16 basal 230113_2_R08 2
## 9 4 variacion NA 7 22 apical 230113_3_R09 1
## 11 4 variacion NA 7 22 basal 230113_3_R11 2
## 12 4 variacion NA 7 22 basal 230113_3_R12 3
## 13 5 NA 7 18 apical 230113_4_R13 3
## 14 5 NA 7 18 basal 230113_4_R15 3
## 16 4 NA 7 23 apical 230113_5_R17 1
## 17 4 NA 7 23 apical 230113_5_R18 4
## 18 4 NA 7 23 basal 230113_5_R19 2
## 19 4 NA 7 23 basal 230113_5_R20 5
## 20 2 borde NA NA 30 apical 230113_6_R21 3
## 21 2 borde NA NA 30 basal 230113_6_R23 2
## 22 2 borde NA NA 30 basal 230113_6_R24 4
## 23 5 NA 8 20 apical 230113_7_R25 4
## 24 5 NA 8 20 apical 230113_7_R26 3
## 25 5 NA 8 20 basal 230113_7_R27 1
## 26 4 NA 7 25 apical 230113_8_R29 4
## 27 4 NA 7 25 basal 230113_8_R31 2
## 28 4 NA 7 25 basal 230113_8_R32 3
## 29 4 NA 8 10 apical 230113_13_R49 1
## 31 4 NA 8 10 basal 230113_13_R51 4
## 32 4 NA 8 10 basal 230113_13_R52 2
## 34 4 borde NA 8 11 apical 230113_14_R54 4
## 35 4 borde NA 8 11 basal 230113_14_R55 2
## 36 4 borde NA 8 11 basal 230113_14_R56 4
## 37 4 NA 8 12 apical 230113_15_R57 5
## 38 4 NA 8 12 apical 230113_15_R58 3
## 39 4 NA 8 12 basal 230113_15_R59 1
## 40 4 NA 8 12 basal 230113_15_R60 3
## 41 5 NA 8 4 apical 230113_16_R61 4
## 42 5 NA 8 4 apical 230113_16_R62 2
## 43 5 NA 8 4 basal 230113_16_R63 4
## 44 5 NA 8 4 basal 230113_16_R64 1
## top_tag lipid_cluster injection_sample_id injection_order leaf_tissue_c
## 1 R01 1 230113_1_R01 3 -1.5
## 2 R02 1 230113_1_R02 19 -0.5
## 3 R03 1 230113_1_R03 29 0.5
## 6 R06 2 230113_2_R06 52 -0.5
## 7 R07 1 230113_2_R07 14 0.5
## 8 R08 1 230113_2_R08 21 1.5
## 9 R09 1 230113_3_R09 10 -1.5
## 11 R11 1 230113_3_R11 22 0.5
## 12 R12 2 230113_3_R12 34 1.5
## 13 R13 2 230113_4_R13 30 -1.5
## 14 R15 1 230113_4_R15 27 0.5
## 16 R17 1 230113_5_R17 6 -1.5
## 17 R18 2 230113_5_R18 41 -0.5
## 18 R19 1 230113_5_R19 15 0.5
## 19 R20 2 230113_5_R20 53 1.5
## 20 R21 2 230113_6_R21 31 -1.5
## 21 R23 1 230113_6_R23 13 0.5
## 22 R24 2 230113_6_R24 37 1.5
## 23 R25 2 230113_7_R25 43 -1.5
## 24 R26 1 230113_7_R26 26 -0.5
## 25 R27 1 230113_7_R27 5 0.5
## 26 R29 2 230113_8_R29 44 -1.5
## 27 R31 1 230113_8_R31 18 0.5
## 28 R32 1 230113_8_R32 25 1.5
## 29 R49 1 230113_13_R49 8 -1.5
## 31 R51 2 230113_13_R51 38 0.5
## 32 R52 1 230113_13_R52 20 1.5
## 34 R54 2 230113_14_R54 42 -0.5
## 35 R55 1 230113_14_R55 17 0.5
## 36 R56 2 230113_14_R56 45 1.5
## 37 R57 2 230113_15_R57 51 -1.5
## 38 R58 2 230113_15_R58 28 -0.5
## 39 R59 1 230113_15_R59 7 0.5
## 40 R60 2 230113_15_R60 33 1.5
## 41 R61 2 230113_16_R61 46 -1.5
## 42 R62 1 230113_16_R62 16 -0.5
## 43 R63 2 230113_16_R63 40 0.5
## 44 R64 1 230113_16_R64 4 1.5
# CRIionAL: Update Treatment levels to match pheno
# The pheno object has Treatment renamed to "+P"/"-P"
sampleInfo_matched$Treatment <- factor(
pheno$Treatment[match(sampleInfo_matched$tube, pheno$tube)],
levels = c("+P", "-P")
)
# Convert to "counts-like" scale (multiply by 1e9 for numerical stability)
scaled_fractions <- ion_fraction[,sampleInfo_matched$tube] * 1e9
# Create DGEList without normalization
y_molar <- DGEList(counts = scaled_fractions, samples = sampleInfo_matched)
y_molar$samples$norm.factors <- 1
# Design matrix: spatial covariates + biological factors + interaction
# Column correction:
d <- sampleInfo_matched
# Define block: column nested within treatment
block <- interaction(d$Treatment, d$Plot_Column)
design <- model.matrix(
~ Plot_Row + injection_order + leaf_tissue_c*Treatment + Genotype*Treatment + leaf_tissue_c*Genotype,
d
)
cat("\nDesign matrix dimensions:", dim(design), "\n")
##
## Design matrix dimensions: 38 9
cat("\nCoefficients in model:\n")
##
## Coefficients in model:
print(colnames(design))
## [1] "(Intercept)" "Plot_Row"
## [3] "injection_order" "leaf_tissue_c"
## [5] "Treatment-P" "GenotypeINV4"
## [7] "leaf_tissue_c:Treatment-P" "Treatment-P:GenotypeINV4"
## [9] "leaf_tissue_c:GenotypeINV4"
cat("\nDesign matrix summary:\n")
##
## Design matrix summary:
print(head(design))
## (Intercept) Plot_Row injection_order leaf_tissue_c Treatment-P GenotypeINV4
## 1 1 3 3 -1.5 1 0
## 2 1 3 19 -0.5 1 0
## 3 1 3 29 0.5 1 0
## 6 1 4 52 -0.5 1 1
## 7 1 4 14 0.5 1 1
## 8 1 4 21 1.5 1 1
## leaf_tissue_c:Treatment-P Treatment-P:GenotypeINV4 leaf_tissue_c:GenotypeINV4
## 1 -1.5 0 0.0
## 2 -0.5 0 0.0
## 3 0.5 0 0.0
## 6 -0.5 1 -0.5
## 7 0.5 1 0.5
## 8 1.5 1 1.5
# Calculate normalization factors
# y <- calcNormFactors(y_molar)
cat("\nNormalization factors calculated\n")
##
## Normalization factors calculated
cat("Norm factors summary:\n")
## Norm factors summary:
# print(summary(y$samples$norm.factors))
# Apply voom transformation
# Voom transformation with precision weights
voomR <- voom(y_molar, design = design, plot = FALSE)
# --- 4. Estimate consensus correlation for blocks ---
corfit <- duplicateCorrelation(voomR, design, block = block)
# --- 5. Fit linear model accounting for block correlation ---
fit <- lmFit(voomR, design, block = block, correlation = corfit$consensus.correlation)
# --- 6. Apply empirical Bayes moderation ---
ebfit <- eBayes(fit)
cat("\nModel fitted successfully\n")
##
## Model fitted successfully
{
cat("Model fitted:", nrow(fit$coefficients), "genes ×",
ncol(fit$coefficients), "coefficients\n")
cat("\nSignificant genes per coefficient (FDR < 0.05):\n")
print(colSums(abs(decideTests(ebfit))))
}
## Model fitted: 135 genes × 9 coefficients
##
## Significant genes per coefficient (FDR < 0.05):
## (Intercept) Plot_Row
## 126 0
## injection_order leaf_tissue_c
## 51 3
## Treatment-P GenotypeINV4
## 38 0
## leaf_tissue_c:Treatment-P Treatment-P:GenotypeINV4
## 4 0
## leaf_tissue_c:GenotypeINV4
## 0
#' Extract differential abundance results for specified predictors
#'
#' @param ebfit An eBayes fitted model object from limma
#' @param predictor_map Named vector mapping coefficient names to display names
#'
#' @return A data frame with DE results for all specified predictors
extract_predictor_effects <- function(ebfit, predictor_map) {
# Get coefficient names from the model
coef_names <- colnames(coef(ebfit))
# Match predictor names to coefficient positions
coef_indices <- match(names(predictor_map), coef_names)
# Validate all predictors found
if (any(is.na(coef_indices))) {
missing <- names(predictor_map)[is.na(coef_indices)]
stop("Coefficients not found: ", paste(missing, collapse = ", "),
"\nAvailable: ", paste(coef_names, collapse = ", "))
}
# Extract results for each predictor
result_list <- lapply(seq_along(coef_indices), function(i) {
idx <- coef_indices[i]
tt <- topTable(ebfit, coef = idx, sort.by = "none", n = Inf)
# Calculate 95% confidence intervals
crit_value <- qt(0.975, ebfit$df.residual + ebfit$df.prior)
std_errors <- ebfit$stdev.unscaled[, idx] * sqrt(ebfit$s2.post)
tt$upper <- tt$logFC + crit_value * std_errors
tt$lower <- tt$logFC - crit_value * std_errors
tt$predictor <- predictor_map[i]
tt$response <- rownames(tt)
tt$std_err <- std_errors
tt
})
# Combine and format
results<- do.call(rbind, result_list)
rownames(effects) <- NULL
results %>%
dplyr::select(predictor, response, everything()) %>%
arrange(adj.P.Val)
}
# Map coefficients (use names from coef(), not topTable())
predictor_map <- c(
"leaf_tissue_c" = "Leaf",
"Treatment-P" = "-P",
"leaf_tissue_c:Treatment-P" = "Leaf:-P"
)
# Verify coefficient names match
cat("Available coefficients:\n")
## Available coefficients:
print(colnames(coef(ebfit)))
## [1] "(Intercept)" "Plot_Row"
## [3] "injection_order" "leaf_tissue_c"
## [5] "Treatment-P" "GenotypeINV4"
## [7] "leaf_tissue_c:Treatment-P" "Treatment-P:GenotypeINV4"
## [9] "leaf_tissue_c:GenotypeINV4"
effects_df <- extract_predictor_effects(ebfit, predictor_map)
# Summary
cat("\nTotal tests:", nrow(effects_df), "\n")
##
## Total tests: 405
cat("Tests per predictor:\n")
## Tests per predictor:
print(table(effects_df$predictor))
##
## -P Leaf Leaf:-P
## 135 135 135
cat("\nSignificant per predictor (FDR < 0.05):\n")
##
## Significant per predictor (FDR < 0.05):
print(table(effects_df$predictor[effects_df$adj.P.Val < 0.05]))
##
## -P Leaf Leaf:-P
## 38 3 4
#' Classify differential lipids with predictor-specific thresholds
#'
#' Applies two-tier classification:
#' - Tier 1: Significant DLs (FDR < 0.05)
#' - Tier 2: High-confidence DLs (significant + large effect size)
#'
#' Effect size thresholds:
#' - Leaf predictors: |logFC| > 0.5 (equivalent to |3β| > 1.5 across range)
#' - Categorical predictors: |logFC| > 1.5
classify_differential_lipids <- function(effects_df) {
# Define predictor types
leaf_predictors <- c("Leaf", "Leaf:-P")
effects_df %>%
mutate(
# Significance
is_DL = adj.P.Val < 0.05,
# High-confidence based on predictor type
is_hiconf_DL = case_when(
predictor %in% leaf_predictors & is_DL & abs(logFC) > 0.5 ~ TRUE,
predictor =="-P" & is_DL & abs(logFC) > 1.5 ~ TRUE,
.default = FALSE
),
# Direction of effect
regulation = case_when(
!is_hiconf_DL ~ "Not significant",
logFC > 0 ~ "Upregulated",
logFC < 0 ~ "Downregulated",
TRUE ~ "Not significant"
),
# Negative log P-value for plotting
neglogP = -log10(adj.P.Val)
) %>%
mutate(label = if_else(is_DL, response, ""))
}
# Define predictor order
effect_order <- c("Leaf", "-P", "Leaf:-P")
# Apply classification
effects_df <- effects_df %>%
mutate(predictor = factor(predictor, levels = effect_order)) %>%
classify_differential_lipids()
# Format labels for plotting
effects_df$label <- sub("_", "", effects_df$label, perl = TRUE)
effects_df$label <- sub("_", ":", effects_df$label, perl = TRUE)
# Summary statistics
cat("Classification summary:\n")
## Classification summary:
cat("Total tests:", nrow(effects_df), "\n\n")
## Total tests: 405
cat("Tier 1 - Significant DLs (FDR < 0.05):\n")
## Tier 1 - Significant DLs (FDR < 0.05):
print(table(effects_df$predictor, effects_df$is_DL))
##
## FALSE TRUE
## Leaf 132 3
## -P 97 38
## Leaf:-P 131 4
cat("\nTier 2 - High-confidence DLs:\n")
##
## Tier 2 - High-confidence DLs:
print(table(effects_df$predictor, effects_df$is_hiconf_DL))
##
## FALSE TRUE
## Leaf 132 3
## -P 112 23
## Leaf:-P 131 4
cat("\nDirection of significant effects:\n")
##
## Direction of significant effects:
print(with(effects_df %>% filter(is_DL),
table(predictor, regulation)))
## regulation
## predictor Downregulated Not significant Upregulated
## Leaf 0 0 3
## -P 12 15 11
## Leaf:-P 1 0 3
# Custom key glyph for legend
draw_key_custom <- function(data, params, size) {
colour <- data$colour
data <- data[data$colour == colour, ]
grid::segmentsGrob(
0, 1 / 2, 1, 1 / 2,
gp = grid::gpar(
col = data$colour,
lwd = data$linewidth * 2
)
)
}
#' Create volcano plot with staged label placement
#'
#' Plots downregulated labels to the left (xlim < -1.5) and upregulated
#' labels to the right (xlim > 1.5). The -P predictor uses unrestricted
#' labeling.
#'
#' @return A ggplot object with faceted volcano plots
plot_data <- effects_df %>%
droplevels() %>%
inner_join(sp, by = c(response = "colname"))
effects_df$is
## NULL
label_data <- plot_data %>%
filter(is_hiconf_DL)
write.csv(
label_data,
file = file.path(paths$intermediate, "high_confidence_DLs.csv"),
row.names = FALSE
)
force <- 1
label_size <-6
lipid_volcano_3 <- plot_data %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Not significant", "Upregulated")
) +
# Stage 1: Downregulated labels (left side)
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf", regulation == "Downregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
segment.color="grey",
min.segment.length = 0,
xlim = c(NA, -0.5),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Stage 2: Upregulated labels (right side)
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf", regulation == "Upregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
segment.color="grey",
min.segment.length = 0,
xlim = c(0.5, NA),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Stage 1: Downregulated labels (left side)
geom_text_repel(
data = label_data %>%
filter(predictor == "-P", regulation == "Downregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
direction = "y",
segment.color="grey",
min.segment.length = 0,
xlim = c(NA, -4),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Stage 2: Upregulated labels (right side)
geom_text_repel(
data = label_data %>%
filter(predictor == "-P", regulation == "Upregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
direction = "y",
segment.color="grey",
min.segment.length = 0,
xlim = c(4, NA),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Separate: -P predictor without xlim restrictions
geom_text_repel(
data = label_data %>%
filter(predictor == "Leaf:-P", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
segment.color="grey",
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("darkblue", "orange2", "darkred"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5)),
fill = "none"
) +
scale_y_continuous(expand = expansion(mult=c(0,0.1)))+
scale_x_continuous(expand = expansion(mult=c(0.3,0.05)))+
theme_classic2(base_size = 30) +
theme(
plot.title = element_blank(),
# strip.text = element_blank(),
strip.text = element_markdown( size=35),
legend.position = c(0.85, 0.95),
legend.background = element_rect(fill = "transparent"),
#axis.title = element_text(size=25),
axis.title.x = element_blank(),
#legend.position = "top",
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 25),
axis.title.y = element_text(margin = margin(r = -5)),
plot.margin = margin(5.5, 5.5, 0, 5.5, "pt"),
strip.background = element_blank()
)
# Save growth curve plots
saveRDS(lipid_volcano_3, file.path(paths$intermediate, "lipid_volcano_3.rds"))
print(lipid_volcano_3)
## Volcano Plot: All Three Predictors (Excluding Interaction)
This focused plot excludes Leaf x -P results to better visualize differential lipids in response to leaf tissue position and phosphorus deficiency.
#' Create volcano plot with staged label placement
#'
#' Plots downregulated labels to the left (xlim < -1.5) and upregulated
#' labels to the right (xlim > 1.5). The -P predictor uses unrestricted
#' labeling.
#'
#' @return A ggplot object with faceted volcano plots
plot_data <- effects_df %>%
filter(predictor!="Leaf:-P") %>%
droplevels() %>%
inner_join(sp, by = c(response = "colname"))
force <- 1
dot_size <- 5
volcano_2 <- plot_data %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Not significant", "Upregulated")
) +
# Stage 1: Downregulated labels (left side)
geom_text_repel(
data = plot_data %>%
filter(predictor == "Leaf", regulation == "Downregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
segment.color="grey",
min.segment.length = 0,
xlim = c(NA, -0.5),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Stage 2: Upregulated labels (right side)
geom_text_repel(
data = plot_data %>%
filter(predictor == "Leaf", regulation == "Upregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
segment.color="grey",
min.segment.length = 0,
xlim = c(0.5, NA),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Stage 1: Downregulated labels (left side)
geom_text_repel(
data = plot_data %>%
filter(predictor == "-P", regulation == "Downregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size=label_size,
direction = "y",
segment.color="grey",
min.segment.length = 0,
xlim = c(NA, -4),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Stage 2: Upregulated labels (right side)
geom_text_repel(
data = plot_data %>%
filter(predictor == "-P", regulation == "Upregulated", !is.na(label), label != ""),
force = 2,
fontface = "bold",
size=label_size,
direction = "y",
segment.color="grey",
min.segment.length = 0,
xlim = c(4, NA),
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("darkblue", "orange2", "darkred"),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
facet_wrap(. ~ predictor, scales = "free_x", ncol = 3) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5),reverse = TRUE),
fill = "none"
) +
scale_y_continuous(expand = expansion(mult=c(0,0.3)))+
scale_x_continuous(expand = expansion(mult=c(0.3,0.02)))+
theme_classic2(base_size = 30) +
theme(
plot.title = element_blank(),
# strip.text = element_blank(),
strip.text = element_markdown( size=35),
legend.position = c(0.12, 0.85),
legend.background = element_rect(fill = "transparent"),
axis.title = element_text(size=25),
#legend.position = "top",
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 20),
#legend.direction = "horizontal",
axis.title.y = element_text(margin = margin(r = -5)),
strip.background = element_blank()
)
print(volcano_2)
ggsave(
filename = file.path(paths$figures, "volcano_2_leafxP.png"),
plot = volcano_2,
height = 3,
width = 5,
units = "in",
dpi = 150
)
#' Create volcano plot for Leaf:-P interaction
#'
#' Plots labels without xlim restrictions for the Leaf:-P interaction effect.
#'
#' @return A ggplot object for Leaf:-P volcano plot
plot_data_leafP <- effects_df %>%
filter(predictor == "Leaf:-P") %>%
droplevels() %>%
inner_join(sp, by = c(response = "colname"))
force <- 1
volcano_leafP <- plot_data_leafP %>%
ggplot(aes(
x = logFC,
y = -log10(adj.P.Val),
color = head_group,
fill = regulation,
label = label
)) +
ylab(expression(-log[10](italic(FDR)))) +
xlab(expression(log[2]("Fold Change"))) +
ylim(0, 5) +
geom_point(alpha = 0.7, shape = 21, color = "white") +
scale_fill_manual(
name = "",
values = c("#00AFBB", "grey", "#bb0c00"),
labels = c("Downregulated", "Not significant", "Upregulated")
) +
# Downregulated labels (no xlim restriction)
geom_text_repel(
data = plot_data_leafP %>%
filter(regulation == "Downregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size = 7,
segment.color = "grey",
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
# Upregulated labels (no xlim restriction)
geom_text_repel(
data = plot_data_leafP %>%
filter(regulation == "Upregulated", !is.na(label), label != ""),
force = force,
fontface = "bold",
size = 7,
segment.color = "grey",
min.segment.length = 0,
max.overlaps = 20,
key_glyph = draw_key_custom
) +
scale_color_manual(
name = "",
values = c("orange2", "darkred", "darkblue" ),
labels = c("Glycolipid", "Neutral lipid", "Phospholipid")
) +
guides(
color = guide_legend(override.aes = list(linewidth = 1.5), reverse = TRUE),
fill = "none"
) +
scale_y_continuous(expand = expansion(mult = c(0, 0.3))) +
theme_classic2(base_size = 30) +
theme(
plot.title = element_blank(),
# strip.text = element_blank(),
strip.text = element_markdown( size=35),
legend.position = "none",
legend.background = element_rect(fill = "transparent"),
axis.title = element_text(size = 25),
legend.spacing = unit(0, "line"),
legend.box.spacing = unit(0, "line"),
legend.text = element_text(size = 20),
axis.title.y = element_text(margin = margin(r = -5)),
strip.background = element_blank()
)
print(volcano_leafP)
ggsave(
filename = file.path(paths$figures, "volcano_leafP.png"),
plot = volcano_leafP,
height = 7,
width = 7,
units = "in",
dpi = 150
)
# Filter significant effects and prepare for plotting
to_plot <- effects_df %>%
mutate(response = gsub("_glyco", "", response)) %>%
mutate(predictor = factor(
predictor,
levels = c("Leaf", "-P", "Leaf:-P","Inv4m", "-P:Inv4m")
)) %>%
filter(is_hiconf_DL) %>%
inner_join(sp, by = c(response = "colname")) %>%
mutate(sign = sign(logFC)) %>%
ungroup() %>%
group_by(response) %>%
mutate(max_effect = max(abs(logFC))) %>%
ungroup() %>%
group_by(predictor, head_group, class) %>%
mutate(head_group = fct_reorder(head_group, adj.P.Val)) %>%
ungroup() %>%
group_by(predictor) %>%
arrange(predictor, head_group, class, adj.P.Val) %>%
mutate(.r = row_number()) %>%
ungroup()
cat("\nSignificant effects per predictor:\n")
##
## Significant effects per predictor:
print(with(to_plot, table(predictor, head_group)))
## head_group
## predictor phospholipid glycolipid neutral
## Leaf 3 0 0
## -P 10 6 7
## Leaf:-P 1 0 3
## Inv4m 0 0 0
## -P:Inv4m 0 0 0
pd <- position_dodge(0.4)
effect_plot_all <- to_plot %>%
droplevels() %>%
mutate(response = fct_reorder(response, .r)) %>%
ungroup() %>%
ggplot(aes(
x = logFC,
y = response,
col = head_group
)) +
xlab("Effect (log2 Fold Change)") +
geom_vline(xintercept = 0, lty = 2) +
geom_point(position = pd, size = 3) +
geom_errorbar(
aes(xmin = upper, xmax = lower),
position = pd,
width = 0.2,
linewidth = 0.7
) +
guides(
shape = guide_legend(title = NULL),
color = guide_legend(reverse = TRUE)
) +
facet_wrap(. ~ predictor, scales = "free", ncol = 4) +
scale_y_discrete(limits = rev) +
scale_color_manual(values = c("blue", "red", "chartreuse")) +
theme_light(base_size = 15) +
theme(
legend.position = "top",
strip.background = element_rect(fill = "white"),
strip.text = element_text(color = "black", size = 15),
axis.title.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank()
)
print(effect_plot_all)
# Export differential abundance results
write.csv(
voomR$E,
file = file.path(paths$intermediate, "voom_normalised_lipids.csv"),
row.names = TRUE
)
# Export differential abundance results
write.csv(
effects_df,
file = file.path(paths$intermediate, "lipid_differential_abundance_results.csv"),
row.names = FALSE
)
# Save volcano plots (alternative export - disabled)
# ggsave(
# volcano_3,
# file = file.path(paths$figures, "lipid_volcano_all.svg"),
# height = 6,
# width = 12
# )
ggsave(
volcano_leafP,
file = file.path(paths$figures, "lipid_volcano_leaf_P.svg"),
height = 6,
width = 10
)
ggsave(
effect_plot_all,
file = file.path(paths$figures, "lipid_effects_all.svg"),
height = 10,
width = 12
)
# Count differential lipids by class and regulation
summary_table <- effects_df %>%
filter(is_DL) %>%
inner_join(sp, by = c(response = "colname")) %>%
group_by(predictor, head_group, regulation) %>%
summarise(n_lipids = n(), .groups = "drop") %>%
arrange(predictor, head_group, regulation)
print(summary_table)
## # A tibble: 10 × 4
## predictor head_group regulation n_lipids
## <fct> <chr> <chr> <int>
## 1 Leaf phospholipid Upregulated 3
## 2 -P glycolipid Not significant 8
## 3 -P glycolipid Upregulated 6
## 4 -P neutral Downregulated 2
## 5 -P neutral Not significant 1
## 6 -P neutral Upregulated 5
## 7 -P phospholipid Downregulated 10
## 8 -P phospholipid Not significant 6
## 9 Leaf:-P neutral Upregulated 3
## 10 Leaf:-P phospholipid Downregulated 1
# Significant lipids per predictor
sig_summary <- effects_df %>%
group_by(predictor) %>%
summarise(
total_tested = n(),
n_significant = sum(is_DL),
n_upregulated = sum(is_DL & logFC>0),
n_downregulated = sum(is_DL & logFC<0),
pct_significant = round(100 * n_significant / total_tested, 1)
)
print(sig_summary)
## # A tibble: 3 × 6
## predictor total_tested n_significant n_upregulated n_downregulated
## <fct> <int> <int> <int> <int>
## 1 Leaf 135 3 3 0
## 2 -P 135 38 20 18
## 3 Leaf:-P 135 4 3 1
## # ℹ 1 more variable: pct_significant <dbl>
library(dplyr)
# Prepare lipid data (high-confidence only)
lipid_table_data <- to_plot %>%
filter(is_hiconf_DL) %>%
mutate(
logFC = round(logFC, digits = 2),
std_err = round(std_err, digits = 2),
neglogP = round(-log10(adj.P.Val), digits = 2),
predictor_display = case_when(
predictor == "-P" ~ "Phosphorus Deficiency (-P)",
predictor == "Leaf" ~ "Leaf Tissue Position (Leaf)",
predictor == "Leaf:-P" ~ "Leaf × Phosphorus Interaction (Leaf:-P)",
TRUE ~ as.character(predictor)
)
) %>%
arrange(predictor, desc(regulation == "Upregulated"), desc(neglogP))
# Function to generate LaTeX
create_lipid_table <- function(df) {
lines <- c()
lines <- c(lines, "\\begin{table}[h!]")
lines <- c(lines, "\\centering")
lines <- c(lines, "\\footnotesize")
lines <- c(lines, "\\caption{High-confidence differentially abundant lipids across experimental factors, annotated with IUB nomenclature and lipid class.}")
lines <- c(lines, "\\label{table::differential_lipids}")
lines <- c(lines, "\\begin{tabular}{|c|c|c|c|}") # 4 columns now
lines <- c(lines, "\\hline")
lines <- c(lines, "\\textbf{Lipid (IUB)} & \\textbf{Class} & \\textbf{$-\\log_{10}(\\textit{FDR})$} & \\textbf{$\\log_2(\\text{FC})$}\\\\")
lines <- c(lines, "\\hline")
for (pred in unique(df$predictor)) {
pred_data <- df %>% filter(predictor == pred)
pred_name <- pred_data$predictor_display[1]
# Predictor section
lines <- c(lines, sprintf("\\multicolumn{4}{|l|}{\\textit{\\textbf{%s}}} \\\\", pred_name))
lines <- c(lines, "\\hline")
# Upregulated
up_data <- pred_data %>% filter(regulation == "Upregulated")
if (nrow(up_data) > 0) {
lines <- c(lines, "\\multicolumn{4}{|l|}{\\textit{\\textbf{Upregulated Lipids}}} \\\\")
lines <- c(lines, "\\hline")
for (i in 1:nrow(up_data)) {
row <- up_data[i, ]
lines <- c(lines, sprintf(
"%s & %s & %.1f & %.2f\\\\",
row$label, row$head_group, row$neglogP, row$logFC,row$std_err
))
}
}
# Downregulated
down_data <- pred_data %>% filter(regulation == "Downregulated")
if (nrow(down_data) > 0) {
lines <- c(lines, "\\multicolumn{4}{|l|}{\\textit{\\textbf{Downregulated Lipids}}} \\\\")
lines <- c(lines, "\\hline")
for (i in 1:nrow(down_data)) {
row <- down_data[i, ]
lines <- c(lines, sprintf(
"%s & %s & %.1f & %.2f\\\\",
row$label, row$head_group, row$neglogP, row$logFC,row$std_err
))
}
}
lines <- c(lines, "\\hline")
}
lines <- c(lines, "\\end{tabular}")
lines <- c(lines, "\\end{table}")
return(paste(lines, collapse = "\n"))
}
# Generate and save
latex_table <- create_lipid_table(lipid_table_data)
writeLines(latex_table, con = file.path(paths$tables, "differential_lipids_table.tex"))
cat("✅ Final lipid table (4 columns, no Notes) saved to:", file.path(paths$tables, "differential_lipids_table.tex"), "\n")
## ✅ Final lipid table (4 columns, no Notes) saved to: /Users/fvrodriguez/Library/CloudStorage/GoogleDrive-frodrig4@ncsu.edu/My Drive/repos/inv4m/results/phosphorus_paper/tables/differential_lipids_table.tex
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sequoia 15.6.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggtext_0.1.2 ggpubr_0.6.2 cowplot_1.2.0 ggrepel_0.9.6 forcats_1.0.1
## [6] tidyr_1.3.1 dplyr_1.1.4 ggplot2_4.0.1 edgeR_4.8.0 limma_3.66.0
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.54 bslib_0.9.0 rstatix_0.7.3
## [5] lattice_0.22-7 vctrs_0.6.5 tools_4.5.1 generics_0.1.4
## [9] tibble_3.3.0 pkgconfig_2.0.3 RColorBrewer_1.1-3 S7_0.2.1
## [13] lifecycle_1.0.4 stringr_1.6.0 compiler_4.5.1 farver_2.1.2
## [17] textshaping_1.0.4 statmod_1.5.1 carData_3.0-5 litedown_0.8
## [21] htmltools_0.5.9 sass_0.4.10 yaml_2.3.12 Formula_1.2-5
## [25] pillar_1.11.1 car_3.1-3 jquerylib_0.1.4 cachem_1.1.0
## [29] abind_1.4-8 commonmark_2.0.0 tidyselect_1.2.1 locfit_1.5-9.12
## [33] digest_0.6.39 stringi_1.8.7 purrr_1.2.0 labeling_0.4.3
## [37] rprojroot_2.1.1 fastmap_1.2.0 grid_4.5.1 here_1.0.2
## [41] colorspace_2.1-2 cli_3.6.5 magrittr_2.0.4 utf8_1.2.6
## [45] broom_1.0.11 withr_3.0.2 scales_1.4.0 backports_1.5.0
## [49] rmarkdown_2.30 ggsignif_0.6.4 ragg_1.5.0 evaluate_1.0.5
## [53] knitr_1.50 viridisLite_0.4.2 markdown_2.0 rlang_1.1.6
## [57] gridtext_0.1.5 Rcpp_1.1.0 glue_1.8.0 xml2_1.5.1
## [61] svglite_2.2.2 jsonlite_2.0.0 R6_2.6.1 systemfonts_1.3.1